setwd("/home/mbotos/CLUSTER/Environments/2024_07_05_Lea_Lausanne_Hip_Tibia_BRBseq/")
Warning: The working directory was changed to /home/mbotos/CLUSTER/Environments/2024_07_05_Lea_Lausanne_Hip_Tibia_BRBseq inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
Path to the data: Environments/2024_07_05_Lea_Lausanne_Hip_Tibia_BRBseq
#library(BiocManager)
rm(list = ls(all.names = TRUE))
gc()
plot_the_pca (Mb) gc trigger (Mb) max used (Mb)
Ncells 5420921 289.6 10014902 534.9 10014902 534.9
Vcells 12103103 92.4 45814420 349.6 57254624 436.9
.libPaths("/home/mbotos/R/x86_64-pc-linux-gnu-library/4.2/")
# BiocManager::install(c("dorothea"),force=TRUE,ask = TRUE)
# library(dorothea)
library(dplyr)
library(tidyr)
library(edgeR)
matrix_counts_tibia <- read.table(file ="/home/mbotos/CLUSTER/Environments/2024_07_05_Lea_Lausanne_Hip_Tibia_BRBseq/brbseq-nf-pipeline/results/AMP0211/count_matrix/L270524LF01_01.read.counts.sampleIDs.detailed.txt",sep = "\t",header = TRUE)
head(matrix_counts_tibia)
tail(matrix_counts_tibia)
dim(matrix_counts_tibia)
[1] 60669 82
matrix_counts_tibia
matrix_counts_hip <- read.table(file ="/home/mbotos/CLUSTER/Environments/2024_07_05_Lea_Lausanne_Hip_Tibia_BRBseq/brbseq-nf-pipeline/results/AMP0211/count_matrix/L270524LF02_01.read.counts.sampleIDs.detailed.txt",sep = "\t",header = TRUE)
head(matrix_counts_hip)
tail(matrix_counts_hip)
dim(matrix_counts_hip)
[1] 60669 66
matrix_counts_hip
ncol(matrix_counts_tibia)
[1] 82
colnames(matrix_counts_tibia)
[1] "Gene_id" "Gene_name" "X80_tibia_non.sclerotic_adipocyte" "X102_tibia_non.sclerotic_adipocyte"
[5] "X107_tibia_non.sclerotic_adipocyte" "X130_Tibia_non.sclerotic_adipocyte" "X149_Tibia_non.sclerotic_adipocyte" "X155_Tibia_non.sclerotic_adipocyte"
[9] "X159_Tibia_non.sclerotic_adipocyte" "X161_Tibia_non.sclerotic_adipocyte" "X144_Tibia_non.sclerotic_adipocyte" "X128_tibia_non.sclerotic_adipocyte"
[13] "X80_tibia_sclerotic_adipocyte" "X102_tibia_sclerotic_adipocyte" "X107_tibia_sclerotic_adipocyte" "X130_Tibia_sclerotic_adipocyte"
[17] "X149_Tibia_sclerotic_adipocyte" "X155_Tibia_sclerotic_adipocyte" "X159_Tibia_sclerotic_adipocyte" "X161_Tibia_sclerotic_adipocyte"
[21] "X144_Tibia_sclerotic_adipocyte" "X128_tibia_sclerotic_adipocyte" "X80_tibia_non.sclerotic_pellet" "X102_tibia_non.sclerotic_pellet"
[25] "X107_tibia_non.sclerotic_pellet" "X130_Tibia_non.sclerotic_pellet" "X149_Tibia_non.sclerotic_pellet" "X155_Tibia_non.sclerotic_pellet"
[29] "X159_Tibia_non.sclerotic_pellet" "X161_Tibia_non.sclerotic_pellet" "X144_Tibia_non.sclerotic_pellet" "X128_tibia_non.sclerotic_pellet"
[33] "X80_tibia_sclerotic_pellet" "X102_tibia_sclerotic_pellet" "X107_tibia_sclerotic_pellet" "X130_Tibia_sclerotic_pellet"
[37] "X149_Tibia_sclerotic_pellet" "X155_Tibia_sclerotic_pellet" "X159_Tibia_sclerotic_pellet" "X161_Tibia_sclerotic_pellet"
[41] "X144_Tibia_sclerotic_pellet" "X128_tibia_sclerotic_pellet" "X104_tibia_non.sclerotic_adipocyte" "X105_tibia_non.sclerotic_adipocyte"
[45] "X109_tibia_non.sclerotic_adipocyte" "X148_Tibia_non.sclerotic_adipocyte" "X151_Tibia_non.sclerotic_adipocyte" "X156_Tibia_non.sclerotic_adipocyte"
[49] "X160_Tibia_non.sclerotic_adipocyte" "X163_Tibia_non.sclerotic_adipocyte" "X164_tibia_non.sclerotic_adipocyte" "X99_Tibia_non.sclerotic_adipocyte"
[53] "X104_tibia_sclerotic_adipocyte" "X105_tibia_sclerotic_adipocyte" "X109_tibia_sclerotic_adipocyte" "X148_Tibia_sclerotic_adipocyte"
[57] "X151_Tibia_sclerotic_adipocyte" "X156_Tibia_sclerotic_adipocyte" "X160_Tibia_sclerotic_adipocyte" "X163_Tibia_sclerotic_adipocyte"
[61] "X164_tibia_sclerotic_adipocyte" "X99_Tibia_sclerotic_adipocyte" "X104_tibia_non.sclerotic_pellet" "X105_tibia_non.sclerotic_pellet"
[65] "X109_tibia_non.sclerotic_pellet" "X148_Tibia_non.sclerotic_pellet" "X151_Tibia_non.sclerotic_pellet" "X156_Tibia_non.sclerotic_pellet"
[69] "X160_Tibia_non.sclerotic_pellet" "X163_Tibia_non.sclerotic_pellet" "X164_tibia_non.sclerotic_pellet" "X99_Tibia_non.sclerotic_pellet"
[73] "X104_tibia_sclerotic_pellet" "X105_tibia_sclerotic_pellet" "X109_tibia_sclerotic_pellet" "X148_Tibia_sclerotic_pellet"
[77] "X151_Tibia_sclerotic_pellet" "X156_Tibia_sclerotic_pellet" "X160_Tibia_sclerotic_pellet" "X163_Tibia_sclerotic_pellet"
[81] "X164_tibia_sclerotic_pellet" "X99_Tibia_sclerotic_pellet"
ncol(matrix_counts_hip)
[1] 66
colnames(matrix_counts_hip)
[1] "Gene_id" "Gene_name" "X74_Hip_non.sclerotic_adipocyte" "X79_Hip_non.sclerotic_adipocyte"
[5] "X125_Hip_non.sclerotic_adipocyte" "X129_Hip_non.sclerotic_adipocyte" "X145_Hip_non.sclerotic_adipocyte" "X150_Hip_non.sclerotic_adipocyte"
[9] "X153_Hip_non.sclerotic_adipocyte" "X157_Hip_non.sclerotic_adipocyte" "X74_Hip_sclerotic_adipocyte" "X79_Hip_sclerotic_adipocyte"
[13] "X125_Hip_sclerotic_adipocyte" "X129_Hip_sclerotic_adipocyte" "X145_Hip_sclerotic_adipocyte" "X150_Hip_sclerotic_adipocyte"
[17] "X153_Hip_sclerotic_adipocyte" "X157_Hip_sclerotic_adipocyte" "X74_Hip_non.sclerotic_pellet" "X79_Hip_non.sclerotic_pellet"
[21] "X125_Hip_non.sclerotic_pellet" "X129_Hip_non.sclerotic_pellet" "X145_Hip_non.sclerotic_pellet" "X150_Hip_non.sclerotic_pellet"
[25] "X153_Hip_non.sclerotic_pellet" "X157_Hip_non.sclerotic_pellet" "X74_Hip_sclerotic_pellet" "X79_Hip_sclerotic_pellet"
[29] "X125_Hip_sclerotic_pellet" "X129_Hip_sclerotic_pellet" "X145_Hip_sclerotic_pellet" "X150_Hip_sclerotic_pellet"
[33] "X153_Hip_sclerotic_pellet" "X157_Hip_sclerotic_pellet" "X78_Hip_non.sclerotic_adipocyte" "X108_Hip_non.sclerotic_adipocyte"
[37] "X126_Hip_non.sclerotic_adipocyte" "X143_Hip_non.sclerotic_adipocyte" "X146_Hip_non.sclerotic_adipocyte" "X152_Hip_non.sclerotic_adipocyte"
[41] "X154_Hip_non.sclerotic_adipocyte" "X158_Hip_non.sclerotic_adipocyte" "X78_Hip_sclerotic_adipocyte" "X108_Hip_sclerotic_adipocyte"
[45] "X126_Hip_sclerotic_adipocyte" "X143_Hip_sclerotic_adipocyte" "X146_Hip_sclerotic_adipocyte" "X152_Hip_sclerotic_adipocyte"
[49] "X154_Hip_sclerotic_adipocyte" "X158_Hip_sclerotic_adipocyte" "X78_Hip_non.sclerotic_pellet" "X108_Hip_non.sclerotic_pellet"
[53] "X126_Hip_non.sclerotic_pellet" "X143_Hip_non.sclerotic_pellet" "X146_Hip_non.sclerotic_pellet" "X152_Hip_non.sclerotic_pellet"
[57] "X154_Hip_non.sclerotic_pellet" "X158_Hip_non.sclerotic_pellet" "X78_Hip_sclerotic_pellet" "X108_Hip_sclerotic_pellet"
[61] "X126_Hip_sclerotic_pellet" "X143_Hip_sclerotic_pellet" "X146_Hip_sclerotic_pellet" "X152_Hip_sclerotic_pellet"
[65] "X154_Hip_sclerotic_pellet" "X158_Hip_sclerotic_pellet"
# Remove the last column
# New version do not need to be removed, Uknown columns is not there anymore...
#the Gene_id or Gene_name which are in the position 1 and 2
#matrix_counts <- matrix_counts[,-ncol(matrix_counts)]
# Remove the last 5 rows
matrix_counts_tibia <- matrix_counts_tibia[1:(nrow(matrix_counts_tibia)-5),]
head(matrix_counts_tibia)
tail(matrix_counts_tibia)
dim(matrix_counts_tibia)
[1] 60664 82
colnames(matrix_counts_tibia)[3:ncol(matrix_counts_tibia)]
[1] "X80_tibia_non.sclerotic_adipocyte" "X102_tibia_non.sclerotic_adipocyte" "X107_tibia_non.sclerotic_adipocyte" "X130_Tibia_non.sclerotic_adipocyte"
[5] "X149_Tibia_non.sclerotic_adipocyte" "X155_Tibia_non.sclerotic_adipocyte" "X159_Tibia_non.sclerotic_adipocyte" "X161_Tibia_non.sclerotic_adipocyte"
[9] "X144_Tibia_non.sclerotic_adipocyte" "X128_tibia_non.sclerotic_adipocyte" "X80_tibia_sclerotic_adipocyte" "X102_tibia_sclerotic_adipocyte"
[13] "X107_tibia_sclerotic_adipocyte" "X130_Tibia_sclerotic_adipocyte" "X149_Tibia_sclerotic_adipocyte" "X155_Tibia_sclerotic_adipocyte"
[17] "X159_Tibia_sclerotic_adipocyte" "X161_Tibia_sclerotic_adipocyte" "X144_Tibia_sclerotic_adipocyte" "X128_tibia_sclerotic_adipocyte"
[21] "X80_tibia_non.sclerotic_pellet" "X102_tibia_non.sclerotic_pellet" "X107_tibia_non.sclerotic_pellet" "X130_Tibia_non.sclerotic_pellet"
[25] "X149_Tibia_non.sclerotic_pellet" "X155_Tibia_non.sclerotic_pellet" "X159_Tibia_non.sclerotic_pellet" "X161_Tibia_non.sclerotic_pellet"
[29] "X144_Tibia_non.sclerotic_pellet" "X128_tibia_non.sclerotic_pellet" "X80_tibia_sclerotic_pellet" "X102_tibia_sclerotic_pellet"
[33] "X107_tibia_sclerotic_pellet" "X130_Tibia_sclerotic_pellet" "X149_Tibia_sclerotic_pellet" "X155_Tibia_sclerotic_pellet"
[37] "X159_Tibia_sclerotic_pellet" "X161_Tibia_sclerotic_pellet" "X144_Tibia_sclerotic_pellet" "X128_tibia_sclerotic_pellet"
[41] "X104_tibia_non.sclerotic_adipocyte" "X105_tibia_non.sclerotic_adipocyte" "X109_tibia_non.sclerotic_adipocyte" "X148_Tibia_non.sclerotic_adipocyte"
[45] "X151_Tibia_non.sclerotic_adipocyte" "X156_Tibia_non.sclerotic_adipocyte" "X160_Tibia_non.sclerotic_adipocyte" "X163_Tibia_non.sclerotic_adipocyte"
[49] "X164_tibia_non.sclerotic_adipocyte" "X99_Tibia_non.sclerotic_adipocyte" "X104_tibia_sclerotic_adipocyte" "X105_tibia_sclerotic_adipocyte"
[53] "X109_tibia_sclerotic_adipocyte" "X148_Tibia_sclerotic_adipocyte" "X151_Tibia_sclerotic_adipocyte" "X156_Tibia_sclerotic_adipocyte"
[57] "X160_Tibia_sclerotic_adipocyte" "X163_Tibia_sclerotic_adipocyte" "X164_tibia_sclerotic_adipocyte" "X99_Tibia_sclerotic_adipocyte"
[61] "X104_tibia_non.sclerotic_pellet" "X105_tibia_non.sclerotic_pellet" "X109_tibia_non.sclerotic_pellet" "X148_Tibia_non.sclerotic_pellet"
[65] "X151_Tibia_non.sclerotic_pellet" "X156_Tibia_non.sclerotic_pellet" "X160_Tibia_non.sclerotic_pellet" "X163_Tibia_non.sclerotic_pellet"
[69] "X164_tibia_non.sclerotic_pellet" "X99_Tibia_non.sclerotic_pellet" "X104_tibia_sclerotic_pellet" "X105_tibia_sclerotic_pellet"
[73] "X109_tibia_sclerotic_pellet" "X148_Tibia_sclerotic_pellet" "X151_Tibia_sclerotic_pellet" "X156_Tibia_sclerotic_pellet"
[77] "X160_Tibia_sclerotic_pellet" "X163_Tibia_sclerotic_pellet" "X164_tibia_sclerotic_pellet" "X99_Tibia_sclerotic_pellet"
# Remove the last 5 rows
matrix_counts_hip <- matrix_counts_hip[1:(nrow(matrix_counts_hip)-5),]
head(matrix_counts_hip)
tail(matrix_counts_hip)
dim(matrix_counts_hip)
[1] 60664 66
colnames(matrix_counts_hip)[3:ncol(matrix_counts_hip)]
[1] "X74_Hip_non.sclerotic_adipocyte" "X79_Hip_non.sclerotic_adipocyte" "X125_Hip_non.sclerotic_adipocyte" "X129_Hip_non.sclerotic_adipocyte"
[5] "X145_Hip_non.sclerotic_adipocyte" "X150_Hip_non.sclerotic_adipocyte" "X153_Hip_non.sclerotic_adipocyte" "X157_Hip_non.sclerotic_adipocyte"
[9] "X74_Hip_sclerotic_adipocyte" "X79_Hip_sclerotic_adipocyte" "X125_Hip_sclerotic_adipocyte" "X129_Hip_sclerotic_adipocyte"
[13] "X145_Hip_sclerotic_adipocyte" "X150_Hip_sclerotic_adipocyte" "X153_Hip_sclerotic_adipocyte" "X157_Hip_sclerotic_adipocyte"
[17] "X74_Hip_non.sclerotic_pellet" "X79_Hip_non.sclerotic_pellet" "X125_Hip_non.sclerotic_pellet" "X129_Hip_non.sclerotic_pellet"
[21] "X145_Hip_non.sclerotic_pellet" "X150_Hip_non.sclerotic_pellet" "X153_Hip_non.sclerotic_pellet" "X157_Hip_non.sclerotic_pellet"
[25] "X74_Hip_sclerotic_pellet" "X79_Hip_sclerotic_pellet" "X125_Hip_sclerotic_pellet" "X129_Hip_sclerotic_pellet"
[29] "X145_Hip_sclerotic_pellet" "X150_Hip_sclerotic_pellet" "X153_Hip_sclerotic_pellet" "X157_Hip_sclerotic_pellet"
[33] "X78_Hip_non.sclerotic_adipocyte" "X108_Hip_non.sclerotic_adipocyte" "X126_Hip_non.sclerotic_adipocyte" "X143_Hip_non.sclerotic_adipocyte"
[37] "X146_Hip_non.sclerotic_adipocyte" "X152_Hip_non.sclerotic_adipocyte" "X154_Hip_non.sclerotic_adipocyte" "X158_Hip_non.sclerotic_adipocyte"
[41] "X78_Hip_sclerotic_adipocyte" "X108_Hip_sclerotic_adipocyte" "X126_Hip_sclerotic_adipocyte" "X143_Hip_sclerotic_adipocyte"
[45] "X146_Hip_sclerotic_adipocyte" "X152_Hip_sclerotic_adipocyte" "X154_Hip_sclerotic_adipocyte" "X158_Hip_sclerotic_adipocyte"
[49] "X78_Hip_non.sclerotic_pellet" "X108_Hip_non.sclerotic_pellet" "X126_Hip_non.sclerotic_pellet" "X143_Hip_non.sclerotic_pellet"
[53] "X146_Hip_non.sclerotic_pellet" "X152_Hip_non.sclerotic_pellet" "X154_Hip_non.sclerotic_pellet" "X158_Hip_non.sclerotic_pellet"
[57] "X78_Hip_sclerotic_pellet" "X108_Hip_sclerotic_pellet" "X126_Hip_sclerotic_pellet" "X143_Hip_sclerotic_pellet"
[61] "X146_Hip_sclerotic_pellet" "X152_Hip_sclerotic_pellet" "X154_Hip_sclerotic_pellet" "X158_Hip_sclerotic_pellet"
# wrong_names <- c("mHH3.12P15...F3neg","mHH3.12P28.EW...F3neg","mHH3.12P28.Ctrl...F3neg")
# colnames(matrix_counts)[3:104] <- gsub(x = colnames(matrix_counts)[3:104],pattern = "F3.$",replacement = "F3+")
# for (wrong_name in wrong_names){
# correct_name <- gsub(pattern = "P",replacement = ".P",x = wrong_name)
# colnames(matrix_counts)[colnames(matrix_counts) == wrong_name] <- correct_name
# }
#
#
# matrix_aspcs <- matrix_counts[3:ncol(matrix_counts)] |> dplyr::select(matches("ASPCs"))
# matrix_f3pos <- matrix_counts[3:ncol(matrix_counts)] |> dplyr::select(ends_with("F3+",))
# matrix_f3neg <- matrix_counts[3:ncol(matrix_counts)] |> dplyr::select(ends_with("F3neg"))
md <- readxl::read_xlsx(path ="/home/mbotos/CLUSTER/Environments/2024_07_05_Lea_Lausanne_Hip_Tibia_BRBseq/Analysis/metadata/sample_info_minimal_SIT.xlsx")
New names:
• `RIN...13` -> `RIN...17`
• `RIN...20` -> `RIN...24`
md$sampleID
[1] "80_tibia_non sclerotic_adipocyte" "102_tibia_non sclerotic_adipocyte" "107_tibia_non sclerotic_adipocyte" "130_Tibia_non sclerotic_adipocyte"
[5] "149_Tibia_non sclerotic_adipocyte" "155_Tibia_non sclerotic_adipocyte" "159_Tibia_non sclerotic_adipocyte" "161_Tibia_non sclerotic_adipocyte"
[9] "144_Tibia_non sclerotic_adipocyte" "128_tibia_non sclerotic_adipocyte" "80_tibia_sclerotic_adipocyte" "102_tibia_sclerotic_adipocyte"
[13] "107_tibia_sclerotic_adipocyte" "130_Tibia_sclerotic_adipocyte" "149_Tibia_sclerotic_adipocyte" "155_Tibia_sclerotic_adipocyte"
[17] "159_Tibia_sclerotic_adipocyte" "161_Tibia_sclerotic_adipocyte" "144_Tibia_sclerotic_adipocyte" "128_tibia_sclerotic_adipocyte"
[21] "80_tibia_non sclerotic_pellet" "102_tibia_non sclerotic_pellet" "107_tibia_non sclerotic_pellet" "130_Tibia_non sclerotic_pellet"
[25] "149_Tibia_non sclerotic_pellet" "155_Tibia_non sclerotic_pellet" "159_Tibia_non sclerotic_pellet" "161_Tibia_non sclerotic_pellet"
[29] "144_Tibia_non sclerotic_pellet" "128_tibia_non sclerotic_pellet" "80_tibia_sclerotic_pellet" "102_tibia_sclerotic_pellet"
[33] "107_tibia_sclerotic_pellet" "130_Tibia_sclerotic_pellet" "149_Tibia_sclerotic_pellet" "155_Tibia_sclerotic_pellet"
[37] "159_Tibia_sclerotic_pellet" "161_Tibia_sclerotic_pellet" "144_Tibia_sclerotic_pellet" "128_tibia_sclerotic_pellet"
[41] "104_tibia_non sclerotic_adipocyte" "105_tibia_non sclerotic_adipocyte" "109_tibia_non sclerotic_adipocyte" "148_Tibia_non sclerotic_adipocyte"
[45] "151_Tibia_non sclerotic_adipocyte" "156_Tibia_non sclerotic_adipocyte" "160_Tibia_non sclerotic_adipocyte" "163_Tibia_non sclerotic_adipocyte"
[49] "164_tibia_non sclerotic_adipocyte" "99_Tibia_non sclerotic_adipocyte" "104_tibia_sclerotic_adipocyte" "105_tibia_sclerotic_adipocyte"
[53] "109_tibia_sclerotic_adipocyte" "148_Tibia_sclerotic_adipocyte" "151_Tibia_sclerotic_adipocyte" "156_Tibia_sclerotic_adipocyte"
[57] "160_Tibia_sclerotic_adipocyte" "163_Tibia_sclerotic_adipocyte" "164_tibia_sclerotic_adipocyte" "99_Tibia_sclerotic_adipocyte"
[61] "104_tibia_non sclerotic_pellet" "105_tibia_non sclerotic_pellet" "109_tibia_non sclerotic_pellet" "148_Tibia_non sclerotic_pellet"
[65] "151_Tibia_non sclerotic_pellet" "156_Tibia_non sclerotic_pellet" "160_Tibia_non sclerotic_pellet" "163_Tibia_non sclerotic_pellet"
[69] "164_tibia_non sclerotic_pellet" "99_Tibia_non sclerotic_pellet" "104_tibia_sclerotic_pellet" "105_tibia_sclerotic_pellet"
[73] "109_tibia_sclerotic_pellet" "148_Tibia_sclerotic_pellet" "151_Tibia_sclerotic_pellet" "156_Tibia_sclerotic_pellet"
[77] "160_Tibia_sclerotic_pellet" "163_Tibia_sclerotic_pellet" "164_tibia_sclerotic_pellet" "99_Tibia_sclerotic_pellet"
[81] "74_Hip_non sclerotic_adipocyte" "79_Hip_non sclerotic_adipocyte" "125_Hip_non sclerotic_adipocyte" "129_Hip_non sclerotic_adipocyte"
[85] "145_Hip_non sclerotic_adipocyte" "150_Hip_non sclerotic_adipocyte" "153_Hip_non sclerotic_adipocyte" "157_Hip_non sclerotic_adipocyte"
[89] "74_Hip_sclerotic_adipocyte" "79_Hip_sclerotic_adipocyte" "125_Hip_sclerotic_adipocyte" "129_Hip_sclerotic_adipocyte"
[93] "145_Hip_sclerotic_adipocyte" "150_Hip_sclerotic_adipocyte" "153_Hip_sclerotic_adipocyte" "157_Hip_sclerotic_adipocyte"
[97] "74_Hip_non sclerotic_pellet" "79_Hip_non sclerotic_pellet" "125_Hip_non sclerotic_pellet" "129_Hip_non sclerotic_pellet"
[101] "145_Hip_non sclerotic_pellet" "150_Hip_non sclerotic_pellet" "153_Hip_non sclerotic_pellet" "157_Hip_non sclerotic_pellet"
[105] "74_Hip_sclerotic_pellet" "79_Hip_sclerotic_pellet" "125_Hip_sclerotic_pellet" "129_Hip_sclerotic_pellet"
[109] "145_Hip_sclerotic_pellet" "150_Hip_sclerotic_pellet" "153_Hip_sclerotic_pellet" "157_Hip_sclerotic_pellet"
[113] "78_Hip_non sclerotic_adipocyte" "108_Hip_non sclerotic_adipocyte" "126_Hip_non sclerotic_adipocyte" "143_Hip_non sclerotic_adipocyte"
[117] "146_Hip_non sclerotic_adipocyte" "152_Hip_non sclerotic_adipocyte" "154_Hip_non sclerotic_adipocyte" "158_Hip_non sclerotic_adipocyte"
[121] "78_Hip_sclerotic_adipocyte" "108_Hip_sclerotic_adipocyte" "126_Hip_sclerotic_adipocyte" "143_Hip_sclerotic_adipocyte"
[125] "146_Hip_sclerotic_adipocyte" "152_Hip_sclerotic_adipocyte" "154_Hip_sclerotic_adipocyte" "158_Hip_sclerotic_adipocyte"
[129] "78_Hip_non sclerotic_pellet" "108_Hip_non sclerotic_pellet" "126_Hip_non sclerotic_pellet" "143_Hip_non sclerotic_pellet"
[133] "146_Hip_non sclerotic_pellet" "152_Hip_non sclerotic_pellet" "154_Hip_non sclerotic_pellet" "158_Hip_non sclerotic_pellet"
[137] "78_Hip_sclerotic_pellet" "108_Hip_sclerotic_pellet" "126_Hip_sclerotic_pellet" "143_Hip_sclerotic_pellet"
[141] "146_Hip_sclerotic_pellet" "152_Hip_sclerotic_pellet" "154_Hip_sclerotic_pellet" "158_Hip_sclerotic_pellet"
# Use mutate to format the sample IDs
md_df <- md %>%
dplyr::mutate(sampleID = gsub(" ", ".", sampleID)) %>%
dplyr::mutate(sampleID = paste0("X", sampleID))
# Display the data frame with formatted sample IDs
print(md_df)
md
md_df
NA
md_df$sampleID
[1] "X80_tibia_non.sclerotic_adipocyte" "X102_tibia_non.sclerotic_adipocyte" "X107_tibia_non.sclerotic_adipocyte" "X130_Tibia_non.sclerotic_adipocyte"
[5] "X149_Tibia_non.sclerotic_adipocyte" "X155_Tibia_non.sclerotic_adipocyte" "X159_Tibia_non.sclerotic_adipocyte" "X161_Tibia_non.sclerotic_adipocyte"
[9] "X144_Tibia_non.sclerotic_adipocyte" "X128_tibia_non.sclerotic_adipocyte" "X80_tibia_sclerotic_adipocyte" "X102_tibia_sclerotic_adipocyte"
[13] "X107_tibia_sclerotic_adipocyte" "X130_Tibia_sclerotic_adipocyte" "X149_Tibia_sclerotic_adipocyte" "X155_Tibia_sclerotic_adipocyte"
[17] "X159_Tibia_sclerotic_adipocyte" "X161_Tibia_sclerotic_adipocyte" "X144_Tibia_sclerotic_adipocyte" "X128_tibia_sclerotic_adipocyte"
[21] "X80_tibia_non.sclerotic_pellet" "X102_tibia_non.sclerotic_pellet" "X107_tibia_non.sclerotic_pellet" "X130_Tibia_non.sclerotic_pellet"
[25] "X149_Tibia_non.sclerotic_pellet" "X155_Tibia_non.sclerotic_pellet" "X159_Tibia_non.sclerotic_pellet" "X161_Tibia_non.sclerotic_pellet"
[29] "X144_Tibia_non.sclerotic_pellet" "X128_tibia_non.sclerotic_pellet" "X80_tibia_sclerotic_pellet" "X102_tibia_sclerotic_pellet"
[33] "X107_tibia_sclerotic_pellet" "X130_Tibia_sclerotic_pellet" "X149_Tibia_sclerotic_pellet" "X155_Tibia_sclerotic_pellet"
[37] "X159_Tibia_sclerotic_pellet" "X161_Tibia_sclerotic_pellet" "X144_Tibia_sclerotic_pellet" "X128_tibia_sclerotic_pellet"
[41] "X104_tibia_non.sclerotic_adipocyte" "X105_tibia_non.sclerotic_adipocyte" "X109_tibia_non.sclerotic_adipocyte" "X148_Tibia_non.sclerotic_adipocyte"
[45] "X151_Tibia_non.sclerotic_adipocyte" "X156_Tibia_non.sclerotic_adipocyte" "X160_Tibia_non.sclerotic_adipocyte" "X163_Tibia_non.sclerotic_adipocyte"
[49] "X164_tibia_non.sclerotic_adipocyte" "X99_Tibia_non.sclerotic_adipocyte" "X104_tibia_sclerotic_adipocyte" "X105_tibia_sclerotic_adipocyte"
[53] "X109_tibia_sclerotic_adipocyte" "X148_Tibia_sclerotic_adipocyte" "X151_Tibia_sclerotic_adipocyte" "X156_Tibia_sclerotic_adipocyte"
[57] "X160_Tibia_sclerotic_adipocyte" "X163_Tibia_sclerotic_adipocyte" "X164_tibia_sclerotic_adipocyte" "X99_Tibia_sclerotic_adipocyte"
[61] "X104_tibia_non.sclerotic_pellet" "X105_tibia_non.sclerotic_pellet" "X109_tibia_non.sclerotic_pellet" "X148_Tibia_non.sclerotic_pellet"
[65] "X151_Tibia_non.sclerotic_pellet" "X156_Tibia_non.sclerotic_pellet" "X160_Tibia_non.sclerotic_pellet" "X163_Tibia_non.sclerotic_pellet"
[69] "X164_tibia_non.sclerotic_pellet" "X99_Tibia_non.sclerotic_pellet" "X104_tibia_sclerotic_pellet" "X105_tibia_sclerotic_pellet"
[73] "X109_tibia_sclerotic_pellet" "X148_Tibia_sclerotic_pellet" "X151_Tibia_sclerotic_pellet" "X156_Tibia_sclerotic_pellet"
[77] "X160_Tibia_sclerotic_pellet" "X163_Tibia_sclerotic_pellet" "X164_tibia_sclerotic_pellet" "X99_Tibia_sclerotic_pellet"
[81] "X74_Hip_non.sclerotic_adipocyte" "X79_Hip_non.sclerotic_adipocyte" "X125_Hip_non.sclerotic_adipocyte" "X129_Hip_non.sclerotic_adipocyte"
[85] "X145_Hip_non.sclerotic_adipocyte" "X150_Hip_non.sclerotic_adipocyte" "X153_Hip_non.sclerotic_adipocyte" "X157_Hip_non.sclerotic_adipocyte"
[89] "X74_Hip_sclerotic_adipocyte" "X79_Hip_sclerotic_adipocyte" "X125_Hip_sclerotic_adipocyte" "X129_Hip_sclerotic_adipocyte"
[93] "X145_Hip_sclerotic_adipocyte" "X150_Hip_sclerotic_adipocyte" "X153_Hip_sclerotic_adipocyte" "X157_Hip_sclerotic_adipocyte"
[97] "X74_Hip_non.sclerotic_pellet" "X79_Hip_non.sclerotic_pellet" "X125_Hip_non.sclerotic_pellet" "X129_Hip_non.sclerotic_pellet"
[101] "X145_Hip_non.sclerotic_pellet" "X150_Hip_non.sclerotic_pellet" "X153_Hip_non.sclerotic_pellet" "X157_Hip_non.sclerotic_pellet"
[105] "X74_Hip_sclerotic_pellet" "X79_Hip_sclerotic_pellet" "X125_Hip_sclerotic_pellet" "X129_Hip_sclerotic_pellet"
[109] "X145_Hip_sclerotic_pellet" "X150_Hip_sclerotic_pellet" "X153_Hip_sclerotic_pellet" "X157_Hip_sclerotic_pellet"
[113] "X78_Hip_non.sclerotic_adipocyte" "X108_Hip_non.sclerotic_adipocyte" "X126_Hip_non.sclerotic_adipocyte" "X143_Hip_non.sclerotic_adipocyte"
[117] "X146_Hip_non.sclerotic_adipocyte" "X152_Hip_non.sclerotic_adipocyte" "X154_Hip_non.sclerotic_adipocyte" "X158_Hip_non.sclerotic_adipocyte"
[121] "X78_Hip_sclerotic_adipocyte" "X108_Hip_sclerotic_adipocyte" "X126_Hip_sclerotic_adipocyte" "X143_Hip_sclerotic_adipocyte"
[125] "X146_Hip_sclerotic_adipocyte" "X152_Hip_sclerotic_adipocyte" "X154_Hip_sclerotic_adipocyte" "X158_Hip_sclerotic_adipocyte"
[129] "X78_Hip_non.sclerotic_pellet" "X108_Hip_non.sclerotic_pellet" "X126_Hip_non.sclerotic_pellet" "X143_Hip_non.sclerotic_pellet"
[133] "X146_Hip_non.sclerotic_pellet" "X152_Hip_non.sclerotic_pellet" "X154_Hip_non.sclerotic_pellet" "X158_Hip_non.sclerotic_pellet"
[137] "X78_Hip_sclerotic_pellet" "X108_Hip_sclerotic_pellet" "X126_Hip_sclerotic_pellet" "X143_Hip_sclerotic_pellet"
[141] "X146_Hip_sclerotic_pellet" "X152_Hip_sclerotic_pellet" "X154_Hip_sclerotic_pellet" "X158_Hip_sclerotic_pellet"
# Split the sampleID column
md_df_split <- strsplit(as.character(md_df$sampleID), "_", fixed = TRUE)
# Create new columns
md_df$sample_number <- sapply(md_df_split, function(x) x[1])
md_df$tissue <- sapply(md_df_split, function(x) x[2])
#md_df$health_status <- sapply(md_df_split, function(x) paste(x[3], x[4], sep = "_"))
md_df$health_status <- sapply(md_df_split, function(x) paste(x[3], sep = "_"))
md_df$tissue_celltype <- sapply(md_df_split, function(x) paste(x[4], sep = "_"))
md_df$tissue_celltype_lowercase <- tolower(md_df$tissue_celltype)
md_df$tissue_lowercase <- tolower(md_df$tissue)
md_df$full_name <- md_df$sampleID
# View the dataframe
md_df
NA
agbmi_df <- readxl::read_xlsx(path ="/home/mbotos/CLUSTER/Environments/2024_07_05_Lea_Lausanne_Hip_Tibia_BRBseq/Analysis/metadata/age-gender-BRBseq.xlsx")
agbmi_df
# Add X infront of the sample ID and close non sclerotic with non.sclerotic.
agbmi_df <- agbmi_df |>
dplyr::mutate(`Sample ID` = gsub(" ", ".", `Sample ID`)) |>
dplyr::mutate(`Sample ID` = paste0("X", `Sample ID`))
agbmi_df
md_df <- merge(x = md_df,y = agbmi_df,by.x = "sampleID",by.y = "Sample ID")
m <- prcomp(t(matrix_counts_hip[,3:ncol(matrix_counts_hip)]), scale. = TRUE, center = TRUE) Input Matrix: After transposing, each row represents a sample, and each column represents a gene. Interpretation: PCA will identify principal components based on the variance among samples across different genes. Results: The principal components will describe the variation between the samples. Each principal component (PC) is a linear combination of the gene expression values.
m <- prcomp(matrix_counts_hip[,3:ncol(matrix_counts_hip)], scale. = TRUE, center = TRUE) Input Matrix: Without transposing, each row represents a gene, and each column represents a sample. Interpretation: PCA will identify principal components based on the variance among genes across different samples. Results: The principal components will describe the variation between the genes. Each principal component (PC) is a linear combination of the sample values.
The choice of whether to transpose the data or not depends on what you want to analyze:
m <- prcomp(t(matrix_counts_hip[,3:ncol(matrix_counts_hip)]), scale. = TRUE, center = TRUE)
m <- prcomp(matrix_counts_hip[,3:ncol(matrix_counts_hip)], scale. = TRUE, center = TRUE)
# If no results of DEG obtained, we reduce stringency, so lower the number.
ncol(matrix_counts_hip)
[1] 66
dim(matrix_counts_hip[,3:ncol(matrix_counts_hip)][rowSums(matrix_counts_hip[,3:ncol(matrix_counts_hip)]) >= 30,])
[1] 25139 64
dim(matrix_counts_tibia[,3:ncol(matrix_counts_tibia)][rowSums(matrix_counts_tibia[,3:ncol(matrix_counts_tibia)]) >= 30,])
[1] 25993 80
# row_sums <- rowSums(matrix_counts_hip[, 3:ncol(matrix_counts_hip)])
# matrix_counts_hip_filtered <- matrix_counts_hip[row_sums >= 30,]
# matrix_counts_hip_filtered
#Function for PCA, removing empty genes counts and empty columns if trnasposed.
run_pca_structuring <- function(matrix_data, transpose=TRUE,filter_empty_samples_counts=30){
data_matrix_raw <- matrix_data
data_matrix <- matrix_data[,3:ncol(matrix_data)]
row_sums = rowSums(data_matrix)
data_matrix_filtered = data_matrix[row_sums >= filter_empty_samples_counts,]
#
# # Remove constant/zero columns
# non_constant_columns <- apply(data_matrix_filtered, 2, function(x) sd(x) != 0)
# data_matrix_filtered <- data_matrix_filtered[, non_constant_columns]
# Check transpose
if (transpose){
data_matrix <- t(data_matrix)
}
#Run PCA
m <- prcomp(data_matrix_filtered,scale. = TRUE,center = TRUE)
#calculat the PC "%"'s
percentVar_obj <- m$sdev^2/sum(m$sdev^2)
#Save as a df
m <- as.data.frame(m[2]$rotation)
m <- merge(x = m,y = md_df,by.x="row.names",by.y="full_name")
return(list(
matrix_raw=data_matrix_raw,
matrix_pca=m,
percentVar_obj=percentVar_obj))
}
#Genes variability, typically biology difference across samples is stronger.. so it is also difficult to catch...
m_pca_tibia <- run_pca_structuring(matrix_data = matrix_counts_tibia,transpose = FALSE,filter_empty_samples_counts = 30)
m_pca_hip <- run_pca_structuring(matrix_data = matrix_counts_hip,transpose = FALSE,filter_empty_samples_counts = 30)
require("ggplot2")
#shapes
plot_the_pca <- function(pca_df,colors,pca,pcb,title,subtitle,colors_selection,shapes,shapes_selection,alphas=0.6,text_column="Row.names",percentVar_obj) {
n <- ggplot(data = pca_df,aes(x = pca_df[[paste0("PC",substr(pca,start = 3,stop = 3))]],
y = pca_df[[paste0("PC",substr(pcb,start = 3,stop = 3))]],
color = pca_df[[colors]],
shape = pca_df[[shapes]]))
# n <- n + geom_point(size=6)
n <- n + geom_jitter(position = position_jitter(width = 0.000000001,height = 0.000000001,seed = 123456),size=6,alpha=alphas)
n <- n + labs(title=paste0("PCA ",title),
#color = paste0(colors),
#shape = paste0(shapes),
subtitle = paste0(subtitle,"")) +
xlab(paste0("PC",substr(paste0("PC",substr(pca,start = 3,stop = 3)),start = 3,stop = 3),": ",
round(percentVar_obj[as.numeric(substr(paste0("PC",substr(pca,start = 3,stop = 3)),start = 3,stop = 3))] * 100,digits = 2), "% variance")) +
ylab(paste0("PC",substr(paste0("PC",substr(pcb,start = 3,stop = 3)),start = 3,stop = 3),": ",
round(percentVar_obj[as.numeric(substr(paste0("PC",substr(pcb,start = 3,stop = 3)),start = 3,stop = 3))] * 100,digits = 2), "% variance"))
n <- n + theme(panel.grid = element_blank(),
panel.border = element_rect(fill = "transparent"),
panel.background = element_rect(fill = "transparent"),
legend.title = element_text(size=15),
legend.text = element_text(size=12))
n <- n + geom_text(aes(label = pca_df[[text_column]]),color = "black", size = 2)
# n <- n + scale_color_manual(values = colors_selection)
# n <- n + scale_shape_manual(values = shapes_selection)
n
}
### Need to be run the run_pca_structuring before the plot the pca otherwise the objects mess up
# Generate the columns in the md_df
plot_the_pca(pca_df = m_pca_tibia$matrix_pca,
percentVar_obj = m_pca_tibia$percentVar_obj,
colors = "tissue_celltype_lowercase",shapes = "health_status",
pca = "PC1",pcb = "PC2",
title="PCA of RNA-seq Raw Data",subtitle = "Tibias SVF and Adipocytes",
text_column = "sample_number")
Warning: Use of `pca_df[[paste0("PC", substr(pca, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pca, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[colors]]` is discouraged.
ℹ Use `.data[[colors]]` instead.
Warning: Use of `pca_df[[shapes]]` is discouraged.
ℹ Use `.data[[shapes]]` instead.
Warning: Use of `pca_df[[text_column]]` is discouraged.
ℹ Use `.data[[text_column]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pca, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pca, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[shapes]]` is discouraged.
ℹ Use `.data[[shapes]]` instead.
### Need to be run the run_pca_structuring before the plot the pca otherwise the objects mess up
# Generate the columns in the md_df
plot_the_pca(pca_df = m_pca_hip$matrix_pca,
percentVar_obj = m_pca_hip$percentVar_obj,
colors = "tissue_celltype_lowercase",shapes = "health_status",
pca = "PC1",pcb = "PC2",
title="PCA of RNA-seq Raw Data",subtitle = "Hips SVF and Adipocytes",
text_column = "sample_number")
Warning: Use of `pca_df[[paste0("PC", substr(pca, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pca, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[colors]]` is discouraged.
ℹ Use `.data[[colors]]` instead.
Warning: Use of `pca_df[[shapes]]` is discouraged.
ℹ Use `.data[[shapes]]` instead.
Warning: Use of `pca_df[[text_column]]` is discouraged.
ℹ Use `.data[[text_column]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pca, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pca, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[shapes]]` is discouraged.
ℹ Use `.data[[shapes]]` instead.
m_pca_tibia_t <- run_pca_structuring(matrix_data = matrix_counts_tibia,transpose = TRUE,filter_empty_samples_counts = 30)
m_pca_hip_t <- run_pca_structuring(matrix_data = matrix_counts_hip,transpose = TRUE,filter_empty_samples_counts = 30)
### Need to be run the run_pca_structuring before the plot the pca otherwise the objects mess up
# Generate the columns in the md_df
plot_the_pca(pca_df = m_pca_tibia_t$matrix_pca,
percentVar_obj = m_pca_tibia_t$percentVar_obj,
colors = "tissue_celltype_lowercase",shapes = "health_status",
pca = "PC1",pcb = "PC2",
title="PCA of RNA-seq Raw Data",subtitle = "Tibias SVF and Adipocyte Transposed",
text_column = "sample_number")
Warning: Use of `pca_df[[paste0("PC", substr(pca, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pca, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[colors]]` is discouraged.
ℹ Use `.data[[colors]]` instead.
Warning: Use of `pca_df[[shapes]]` is discouraged.
ℹ Use `.data[[shapes]]` instead.
Warning: Use of `pca_df[[text_column]]` is discouraged.
ℹ Use `.data[[text_column]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pca, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pca, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[shapes]]` is discouraged.
ℹ Use `.data[[shapes]]` instead.
### Need to be run the run_pca_structuring before the plot the pca otherwise the objects mess up
# Generate the columns in the md_df
plot_the_pca(pca_df = m_pca_hip_t$matrix_pca,
percentVar_obj = m_pca_hip_t$percentVar_obj,
colors = "tissue_celltype_lowercase",shapes = "health_status",
pca = "PC1",pcb = "PC2",
title="PCA of RNA-seq Raw Data",
subtitle = "Hips SVF and Adipocyte Transposed",
text_column = "sample_number")
Warning: Use of `pca_df[[paste0("PC", substr(pca, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pca, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[colors]]` is discouraged.
ℹ Use `.data[[colors]]` instead.
Warning: Use of `pca_df[[shapes]]` is discouraged.
ℹ Use `.data[[shapes]]` instead.
Warning: Use of `pca_df[[text_column]]` is discouraged.
ℹ Use `.data[[text_column]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pca, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pca, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[shapes]]` is discouraged.
ℹ Use `.data[[shapes]]` instead.
m_pca_tibia_transposed_adipocytes <- run_pca_structuring(matrix_data = matrix_counts_tibia |> dplyr::select("Gene_id","Gene_name",ends_with("adipocyte")),transpose = TRUE,filter_empty_samples_counts = 30)
plot_the_pca(pca_df = m_pca_tibia_transposed_adipocytes$matrix_pca,
percentVar_obj = m_pca_tibia_transposed_adipocytes$percentVar_obj,
colors = "tissue_celltype_lowercase",
shapes = "health_status",
pca = "PC1",pcb = "PC2",
title="PCA of RNA-seq Raw Data",subtitle = "Tibias Adipocytes Transposed",
text_column = "sample_number")
Warning: Use of `pca_df[[paste0("PC", substr(pca, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pca, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[colors]]` is discouraged.
ℹ Use `.data[[colors]]` instead.
Warning: Use of `pca_df[[shapes]]` is discouraged.
ℹ Use `.data[[shapes]]` instead.
Warning: Use of `pca_df[[text_column]]` is discouraged.
ℹ Use `.data[[text_column]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pca, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pca, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[shapes]]` is discouraged.
ℹ Use `.data[[shapes]]` instead.
# Issued samples with low number of genes per sample from HTML report
#107 tibia non sclerotic adipocyte
#107 tibia sclerotic adipocyte
##99 tibia sclerotic adipocyte
#80 tibia non sclerotic pellet
#163 tibia sclerotic pellet
m_pca_tibia_transposed_pellet <- run_pca_structuring(matrix_data = matrix_counts_tibia |> dplyr::select("Gene_id","Gene_name",ends_with("pellet")),transpose = TRUE,filter_empty_samples_counts = 30)
plot_the_pca(pca_df = m_pca_tibia_transposed_pellet$matrix_pca,
percentVar_obj = m_pca_tibia_transposed_pellet$percentVar_obj,
colors = "tissue_celltype_lowercase",
shapes = "health_status",
pca = "PC1",pcb = "PC2",
title="PCA of RNA-seq Raw Data",subtitle = "Tibias Pellet Transposed",
text_column = "sample_number")
Warning: Use of `pca_df[[paste0("PC", substr(pca, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pca, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[colors]]` is discouraged.
ℹ Use `.data[[colors]]` instead.
Warning: Use of `pca_df[[shapes]]` is discouraged.
ℹ Use `.data[[shapes]]` instead.
Warning: Use of `pca_df[[text_column]]` is discouraged.
ℹ Use `.data[[text_column]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pca, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pca, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[shapes]]` is discouraged.
ℹ Use `.data[[shapes]]` instead.
# Issued samples with low number of genes per sample from HTML report
#80 tibia non sclerotic pellet
#163 tibia sclerotic pellet
m_pca_hip_transposed_adipocytes <- run_pca_structuring(matrix_data = matrix_counts_hip |> dplyr::select("Gene_id","Gene_name",ends_with("adipocyte")),transpose = TRUE,filter_empty_samples_counts = 30)
plot_the_pca(pca_df = m_pca_hip_transposed_adipocytes$matrix_pca,
percentVar_obj = m_pca_hip_transposed_adipocytes$percentVar_obj,
colors = "tissue_celltype_lowercase",
shapes = "health_status",
pca = "PC1",pcb = "PC2",
title="PCA of RNA-seq Raw Data",subtitle = "Hips Adipocytes Transposed",
text_column = "sample_number")
Warning: Use of `pca_df[[paste0("PC", substr(pca, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pca, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[colors]]` is discouraged.
ℹ Use `.data[[colors]]` instead.
Warning: Use of `pca_df[[shapes]]` is discouraged.
ℹ Use `.data[[shapes]]` instead.
Warning: Use of `pca_df[[text_column]]` is discouraged.
ℹ Use `.data[[text_column]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pca, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pca, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[shapes]]` is discouraged.
ℹ Use `.data[[shapes]]` instead.
# Issued samples with low number of genes per sample from HTML report, expected to be also associated with low number of sequencing read per sample...
#74 hip non sclerotic adipocyte
#79 hip non sclerotic adipocyte
#150 hip non sclerotic adipocyte
#74 hip sclerotic adipocyte
#79 hip sclerotic adipocyte
#78 hip non sclerotic adipocyte
#78 hip sclerotic adipocyte
m_pca_hip_transposed_pellet <- run_pca_structuring(matrix_data = matrix_counts_hip |> dplyr::select("Gene_id","Gene_name",ends_with("pellet")),transpose = TRUE,filter_empty_samples_counts = 30)
plot_the_pca(pca_df = m_pca_hip_transposed_pellet$matrix_pca,
percentVar_obj = m_pca_hip_transposed_pellet$percentVar_obj,
colors = "tissue_celltype_lowercase",
shapes = "health_status",
pca = "PC1",pcb = "PC2",
title="PCA of RNA-seq Raw Data",subtitle = "Hips Pellet Transposed",
text_column = "sample_number")
Warning: Use of `pca_df[[paste0("PC", substr(pca, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pca, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[colors]]` is discouraged.
ℹ Use `.data[[colors]]` instead.
Warning: Use of `pca_df[[shapes]]` is discouraged.
ℹ Use `.data[[shapes]]` instead.
Warning: Use of `pca_df[[text_column]]` is discouraged.
ℹ Use `.data[[text_column]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pca, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pca, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[shapes]]` is discouraged.
ℹ Use `.data[[shapes]]` instead.
# Issued samples with low number of genes per sample from HTML report
#74 Hip non sclerotic pellet
#74 Hip sclerotic pellet
#78 hip non.sclerotic pellet
#78 hip sclerotic pellet
md_df
plot_the_pca(pca_df = m_pca_hip_transposed_pellet$matrix_pca,
percentVar_obj = m_pca_hip_transposed_pellet$percentVar_obj,
colors = "tissue_celltype_lowercase",
shapes = "health_status",
pca = "PC1",pcb = "PC2",
title="PCA of RNA-seq Raw Data",subtitle = "Hips Pellet Transposed",
text_column = "Age")
Warning: Use of `pca_df[[paste0("PC", substr(pca, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pca, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[colors]]` is discouraged.
ℹ Use `.data[[colors]]` instead.
Warning: Use of `pca_df[[shapes]]` is discouraged.
ℹ Use `.data[[shapes]]` instead.
Warning: Use of `pca_df[[text_column]]` is discouraged.
ℹ Use `.data[[text_column]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pca, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pca, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[shapes]]` is discouraged.
ℹ Use `.data[[shapes]]` instead.
plot_the_pca(pca_df = m_pca_hip_transposed_pellet$matrix_pca,
percentVar_obj = m_pca_hip_transposed_pellet$percentVar_obj,
colors = "Gender",
shapes = "health_status",
pca = "PC1",pcb = "PC2",
title="PCA of RNA-seq Raw Data",subtitle = "Hips Pellet Transposed",
text_column = "Age")
Warning: Use of `pca_df[[paste0("PC", substr(pca, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pca, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[colors]]` is discouraged.
ℹ Use `.data[[colors]]` instead.
Warning: Use of `pca_df[[shapes]]` is discouraged.
ℹ Use `.data[[shapes]]` instead.
Warning: Use of `pca_df[[text_column]]` is discouraged.
ℹ Use `.data[[text_column]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pca, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pca, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` is discouraged.
ℹ Use `.data[[paste0("PC", substr(pcb, start = 3, stop = 3))]]` instead.
Warning: Use of `pca_df[[shapes]]` is discouraged.
ℹ Use `.data[[shapes]]` instead.
m_pca_tibia$matrix_raw
#matrix_counts_tibia
#m_pca_tibia$matrix_raw
sums_mm = colSums(m_pca_tibia$matrix_raw[3:ncol(m_pca_tibia$matrix_raw)])
sums_mm <- data.frame(Column = colnames(m_pca_tibia$matrix_raw[3:ncol(m_pca_tibia$matrix_raw)]), Sum = sums_mm)
sums_mm$Column <-tolower(sums_mm$Column)
# Categorize the groups based on the Column names
sums_mm$Group <- ifelse(grepl("tibia_non.sclerotic_adipocyte", sums_mm$Column), "TNSA",
ifelse(grepl("tibia_sclerotic_adipocyte", sums_mm$Column), "TSA",
ifelse(grepl("tibia_non.sclerotic_pellet", sums_mm$Column), "TNSP", "TSP")))
sums_mm
# Create a custom color palette
color_palette <- c("TNSA" = "#00296b", "TSA" = "#1e91d0", "TNSP" = "#faa819","TSP"="#f37520")
library(ggplot2)
mean_sum <- mean(sums_mm$Sum)
quantiles <- quantile(sums_mm$Sum, probs = c(0.25, 0.75,0.90))
p <- ggplot(sums_mm, aes(x = reorder(Column, Sum), y = Sum, fill = Group)) +
#ggplot(sums_mm, aes(x = Column, y = Sum, fill = Group)) +
geom_bar(stat = "identity") +
labs(title = "Counts Sums",
x = "Samples",
y = "Sum of all counts") +
scale_fill_manual(values = color_palette) +
geom_hline(aes(yintercept = mean_sum, color = "Mean"), linetype = "dashed", size = 1) +
geom_hline(aes(yintercept = quantiles[1], color = "25th Percentile"), linetype = "dotted", size = 1) +
geom_hline(aes(yintercept = quantiles[2], color = "75th Percentile"), linetype = "dotted", size = 1) +
geom_hline(aes(yintercept = quantiles[3], color = "90th Percentile"), linetype = "dotted", size = 1) +
scale_color_manual(values = c("red", "blue", "green","#903498"),
breaks = c("Mean", "25th Percentile", "75th Percentile", "90th Percentile"),
name = "Statistics") + theme(axis.text.x = element_text(angle = 90, hjust = 1),
axis.text.y = element_text(size = 12),
panel.background = element_blank(), # Remove panel background
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
) # Rotate x-axis labels for better visibility
plotly::ggplotly(p)
#X163 TSP #X99 TSA #X107 TNSA TSA TNSP #X80 TNSP TNSA TSP #X105 TSP TNSP #X148 TSP #X104 TSA TSP #X164 TSA #X109 TNSP # we filter out around same proportions if 25% in TIBIA is around 700k we filtered 407265 and in HIP 25% is 1M in Hip, we filterd 791K
sums_mm |> dplyr::filter(Sum < 407265)
sums_mm_tibia <- sums_mm |> dplyr::filter(!Sum < 407265)
m_pca_hip$matrix_raw
#matrix_counts_hip
#m_pca_hip$matrix_raw
sums_mm = colSums(m_pca_hip$matrix_raw[3:ncol(m_pca_hip$matrix_raw)])
sums_mm <- data.frame(Column = colnames(m_pca_hip$matrix_raw[3:ncol(m_pca_hip$matrix_raw)]), Sum = sums_mm)
sums_mm$Column <-tolower(sums_mm$Column)
# Categorize the groups based on the Column names
sums_mm$Group <- ifelse(grepl("hip_non.sclerotic_adipocyte", sums_mm$Column), "HNSA",
ifelse(grepl("hip_sclerotic_adipocyte", sums_mm$Column), "HSA",
ifelse(grepl("hip_non.sclerotic_pellet", sums_mm$Column), "HNSP", "HSP")))
sums_mm
# Create a custom color palette
color_palette <- c("HNSA" = "#00296b", "HSA" = "#1e91d0", "HNSP" = "#faa819","HSP"="#f37520")
library(ggplot2)
c <- mean(sums_mm$Sum)
quantiles <- quantile(sums_mm$Sum, probs = c(0.25, 0.75,0.90))
p <- ggplot(sums_mm, aes(x = reorder(Column, Sum), y = Sum, fill = Group)) +
#ggplot(sums_mm, aes(x = Column, y = Sum, fill = Group)) +
geom_bar(stat = "identity") +
labs(title = "Counts Sums",
x = "Samples",
y = "Sum of all counts") +
scale_fill_manual(values = color_palette) +
geom_hline(aes(yintercept = mean_sum, color = "Mean"), linetype = "dashed", size = 1) +
geom_hline(aes(yintercept = quantiles[1], color = "25th Percentile"), linetype = "dotted", size = 1) +
geom_hline(aes(yintercept = quantiles[2], color = "75th Percentile"), linetype = "dotted", size = 1) +
geom_hline(aes(yintercept = quantiles[3], color = "90th Percentile"), linetype = "dotted", size = 1) +
scale_color_manual(values = c("red", "blue", "green","#903498"),
breaks = c("Mean", "25th Percentile", "75th Percentile", "90th Percentile"),
name = "Statistics") + theme(axis.text.x = element_text(angle = 90, hjust = 1),
axis.text.y = element_text(size = 12),
panel.background = element_blank(), # Remove panel background
panel.grid.major = element_blank(), # Remove major grid lines
panel.grid.minor = element_blank(), # Remove minor grid lines
) # Rotate x-axis labels for better visibility
plotly::ggplotly(p)
#####X78 - 4 #####X74 -4 #####X79 -2 HNSA and HSA #####X150 HNSA #####X154 HNSP # Removed all the samples with less than 791286 counts in Hip.
sums_mm_hip <- sums_mm |> dplyr::filter(!Sum < 791286)
sums_mm |> dplyr::filter(Sum < 791286)
sums_mm_hip
NA
#tibia
#hip
#adipo
#pellet
#sclerotic
#non_sclerotic
#
# tibias-> sclerotic vs nsclerotic
# hip-> sclerotic vs nsclerotic
#
# Adipocyte -> tibia vs Hip
#
#
# Adipocytes
# Non-sclerotic vs Sclerotic of Hip
# Non-sclerotic vs Sclerotic of Tibia
# Non-sclerotic Tibia vs Non-sclerotic Hip
# Sclerotic Tibia vs Sclerotic Hip
#
#
# Pellet
# Non-sclerotic vs Sclerotic of Hip
# Non-sclerotic vs Sclerotic of Tibia
# Non-sclerotic Tibia vs Non-sclerotic Hip
# Sclerotic Tibia vs Sclerotic Hip
#
# Adipocytes vs Pellet results --> Venn
# Non sclerotic alone
# Sclerotic alone
sums_mm_tibia sums_mm_hip
sums_mm_tibia
sums_mm_hip
#matrix_counts_tibia
#matrix_counts_hip
m_pca_tibia$matrix_raw_filtered_samples_for_DEG <- m_pca_tibia$matrix_raw[,rownames(sums_mm_tibia)]
m_pca_hip$matrix_raw_filtered_samples_for_DEG <- m_pca_hip$matrix_raw[,rownames(sums_mm_hip)]
#using dplyr
#m_pca_tibia$matrix_raw_filtered_samples_for_DEG <- m_pca_tibia$matrix_raw |> select(all_of(rownames(sums_mm_tibia)))